

***************************************************************
******** The Bridge Project, Final Data Cleaning File *********
******* Butikofer, Loken, and Willen (2022) *******************
***************************************************************
* Date: Feb 16, 2022
* Stata version: 15.1
***************************************************************
version 15
set seed 888888
log using BLW_Bridge_2022_Cleaning, replace

***************************************************************
* Setting directory. Note: Replace X with actual local path directory prior to running. 
	global path_base 	"X\Temp"
	global output 		"X\Results"
	global path_adv 	"X\Analysis Data"
***************************************************************

*************************************	
* Importing Swedish background data *
*************************************
{
    forvalues j=1(1)2 {
		
	forvalues i=1997(1)2014{
		odbc load, exec("select * from individ_`i'")dsn("P0950") clear
		gen year = `i'
		destring Kon, force replace
		keep if Kon == `j'
		destring Kommun, force replace
		keep if Alder<65
		drop if Alder<18
		save "$path_base\Individ_`i'_`j'", replace
		}
	

* Note - This data importation step is performed in year chunks and seperately by gender. This is due to memory constraints on the local server. You may be able to perform this in one step depending on server restrictions.
	use "$path_base\Individ_1997_`j'", clear
	
	forvalues i=1998(1)2010{
		append using "$path_base\Individ_`i'_`j'"
		}
		
	capture drop Forsamling StudDeltTyp KU1Ink KU1YrkStalln KU1AstKommun KU1AstLan KU1InstKod KU1SektorKod KU1AstSNI92 KU2Ink KU2YrkStalln KU2InstKod KU2SektorKod KU2AstSNI92 KU3Ink KU3YrkStalln KU3InstKod KU3SektorKod KU3AstSNI92 FInk KapInk KU1InstKod6 KU2InstKod6 KU3InstKod6 KU1InstKod7 KU2InstKod7 KU3InstKod7 KU1Ssyk4 KU1SsykAr KU2AstSNI2002 KU2Ssyk4 KU2SsykAr KU3AstSNI2002 KU1AstSNI2007 KU2AstKommun KU2AstLan KU2AstSNI2007 KU3AstKommun KU3AstLan KU3AstSNI2007 Forsamling StudDeltTyp KU1Ink KU1YrkStalln KU1AstKommun KU1AstLan KU1InstKod KU1SektorKod KU1AstSNI92 KU2Ink KU2YrkStalln KU2InstKod KU2SektorKod KU2AstSNI92 KU3Ink KU3YrkStalln KU3InstKod KU3SektorKod KU3AstSNI92 FInk KapInk KU1InstKod6 KU2InstKod6 KU3InstKod6 KU1InstKod7 KU2InstKod7 KU3InstKod7 KU1Ssyk4 KU1SsykAr KU2AstSNI2002 KU2Ssyk4 KU2SsykAr KU3AstSNI2002 KU1AstSNI2007 KU2AstKommun KU2AstLan KU2AstSNI2007 KU3AstLan
	
	forvalues i=2011(1)2013{
		append using "$path_base\Individ_`i'_`j'"
		}
		
	drop AntFlyttTot AntFlyttKommun CivilAntAr InstKod InstKod6 InstKod7 Ssyk4 Ssyk3 SsykAr SsykStatus InkFNettoA StudDeltTyp KU2Ink KU2YrkStalln KU2AstKommun KU2AstLan KU2InstKod7 KU2SektorKod KU2AstSNI2007 KU2Ssyk4 KU2SsykAr KU3Ink KU3YrkStalln KU3AstKommun KU3AstLan KU3InstKod7 KU3SektorKod KU3AstSNI2007 KapInk
	
	compress
	
	forvalues i=2014(1)2014{
		append using "$path_base\Individ_`i'_`j'"
		}
		
	compress

	gen obs = 1
	save "$path_base\Individ1997_2014_`j'", replace
	}
	
	* Combining all the Swedish background data and dropping variables that are not being used
	use "$path_base\Individ1997_2014_1", clear
	capture drop CivilAntAr YrkStalln AntAns AstLan AntalSys SektorKod InstKod7 Ssyk3 SsykAr Forsamling StudDeltTyp Ssyk3_2012 FamStF AstSNI92 InstKod10 InstKod InstKod6 InkFNettoA SsykStatus
	drop KapInk InkFNettoA KU2Ink KU2YrkStalln KU2AstKommun KU2AstLan KU2InstKod10 KU2SektorKod KU2AstSNI2007 KU2Ssyk4_2012 KU2SsykAr KU3Ink KU3YrkStalln KU3AstKommun KU3AstLan KU3InstKod10 KU3SektorKod KU3AstSNI2007 CivilAntAr AntFlyttTot AntFlyttKommun DekLon AntalSys
	drop Barn18plus FamStF FamTypF YrkStalln AntAns AstKommun AstLan SektorKod ForvErs AstSNI2002 KU1AstSNI2002 Barn18_19 Barn20plus AstSNI2007 KU1Ink KU1YrkStalln KU1AstKommun KU1AstLan KU1InstKod7 KU1SektorKod KU1AstSNI2007 KU1Ssyk4 KU1SsykAr StudDeltTyp InstKod10 Ssyk3_2012 SsykAr KU1InstKod10 KU1Ssyk4_2012
	save, replace
	use "$path_base\Individ1997_2014_2", clear
	capture drop CivilAntAr YrkStalln AntAns AstLan AntalSys SektorKod InstKod7 Ssyk3 SsykAr Forsamling StudDeltTyp Ssyk3_2012 FamStF AstSNI92 InstKod10 InstKod InstKod6 InkFNettoA SsykStatus
	save, replace
	drop KapInk InkFNettoA KU2Ink KU2YrkStalln KU2AstKommun KU2AstLan KU2InstKod10 KU2SektorKod KU2AstSNI2007 KU2Ssyk4_2012 KU2SsykAr KU3Ink KU3YrkStalln KU3AstKommun KU3AstLan KU3InstKod10 KU3SektorKod KU3AstSNI2007 CivilAntAr AntFlyttTot AntFlyttKommun DekLon AntalSys
	drop Barn18plus FamStF FamTypF YrkStalln AntAns AstKommun AstLan SektorKod ForvErs AstSNI2002 KU1AstSNI2002 Barn18_19 Barn20plus AstSNI2007 KU1Ink KU1YrkStalln KU1AstKommun KU1AstLan KU1InstKod7 KU1SektorKod KU1AstSNI2007 KU1Ssyk4 KU1SsykAr StudDeltTyp InstKod10 Ssyk3_2012 SsykAr KU1InstKod10 KU1Ssyk4_2012
	append using "$path_base\Individ1997_2014_1"
	
	save "$path_base\Individ1997_2014", replace
}
	
**********************************************
* Importing Sweden-Denmark cross-border data *
**********************************************
{
	forvalues i=1997(1)2014{
		odbc load, exec("select * from grsSVDK`i'")dsn("P0950") clear
		gen year = `i'
			if `i'<2008 {
			destring NovMark_Arb, force replace
			}
			if `i'>2003 {
			drop FtgStl
			}
			if `i'>2004 {
			drop Utb_Niva
			}
			if `i'>2006 {
			destring SumLon_Arb_SEK, force replace 
			drop Lon_Arb_SEK
			}
		save "$path_base\SVDK_`i'", replace
		}

	* Combining all the Sweden-Denmark cross-border data
	use "$path_base\SVDK_1997", clear
		forvalues i=1998(1)2014{
		append using "$path_base\SVDK_`i'"
		}
	compress
	ren P0950_Lopnr_Personnr P0950_LopNr_PersonNr
	save "$path_base\SVDK1997_2014", replace 
}
	
***************************
* Combining the data sets *
***************************
{
	use "$path_base\Individ1997_2014", clear
	compress
	duplicates drop P0950_LopNr_PersonNr year, force 
	merge 1:1 P0950_LopNr_PersonNr year using "$path_base\SVDK1997_2014"
	drop _merge
	save "$path_base\Sample_1", replace 

* Note - This in done in two chunks due to memory constraints on the local server

	* Chunk one (Municipalities<1200)
	{
* Preparing for preliminary descriptives
	use "$path_base\Sample_1", clear 
	destring Kommun, 		force replace 
	keep if Kommun<1200
	
	destring Kon, force replace
	gen Female		=  1 if Kon == 2
	replace Female	=  0 if Kon == 1

* Generating outcome variables 
drop ExamAr Utd_KOmbi obs MedbGrEg2 SyssStatJ Forsamling SsykStatus SyssStat11	
compress

	replace 	LoneInk			= LoneInk * 100
	
	gen 		DKtotwage		= SumLon_Arb_SEK
	replace 	DKtotwage 		= 0 if DKtotwage 	== .	// Note: Those with no wage in DK will have . 
	
	gen 		SEKtotwage		= LoneInk 
	replace 	SEKtotwage		= 0 if SEKtotwage	== .	// Note: There are no missings. 

	gen 		Totwage			= SEKtotwage + DKtotwage
		
	destring 	SenInvAr, force replace
	gen 		Immigrant		= (SenInvAr!=. & SenInvAr!=0)
	replace		Immigrant		= 0 if Immigrant 	== .
	ren 		Kommun kommun 
	drop 		if Alder==.
	
	gen DKwork=(DKtotwage>0 & DKtotwage!=.)
	gen SEKwork=(SEKtotwage>0 & SEKtotwage!=.)
	
	* Stratification variables
	destring Sun2000niva, force replace
	gen 	Educ = .
	replace Educ = 1 if Sun2000niva<300 & Sun2000niva!=0 
	replace Educ = 2 if Sun2000niva>299 & Sun2000niva<400
	replace Educ = 3 if Sun2000niva>399 & Sun2000niva!=999 & Sun2000niva!=. 
		
	* Obtaining aggregated education specialization 
	gen educ_specialization = substr(Sun2000Inr,1,1)
	destring educ_specialization, force replace
	
	* Reducing Space and creating some variables 
	compress
	destring Lan, force replace		
	destring AstKom_Arb, force replace
	gen ALL=1
	gen FEMALE=(Female==1)
	gen MALE=(Female==0)
	save "$path_base\CleanedSample1", replace 
	}
	
	* Chunk 2 (Municipalities > 1199)
	{
	* Preparing for preliminary descriptives
	use 			"$path_base\Sample_1", clear 
	destring Kommun, 		force replace 
	keep if Kommun>1199
	
	destring Kon, force replace
	gen Female		=  1 if Kon == 2
	replace Female	=  0 if Kon == 1

* Generating outcome variables 
drop ExamAr Utd_KOmbi obs MedbGrEg2 SyssStatJ Forsamling SsykStatus SyssStat11	
compress

	replace 	LoneInk			= LoneInk * 100
	
	gen 		DKtotwage		= SumLon_Arb_SEK
	replace 	DKtotwage 		= 0 if DKtotwage 	== .	// Note: Those with no wage in DK will have . 
	
	gen 		SEKtotwage		= LoneInk 
	replace 	SEKtotwage		= 0 if SEKtotwage	== .	// Note: There are no missings. 

	gen 		Totwage			= SEKtotwage + DKtotwage
		
	gen 		DKwork			= (DKtotwage>0 & DKtotwage!=.)
	gen 		SEKwork			= (SEKtotwage>0 & SEKtotwage!=.)
	
	destring 	SenInvAr, force replace
	gen 		Immigrant		= (SenInvAr!=. & SenInvAr!=0)
	replace		Immigrant		= 0 if Immigrant 	== .
	ren 		Kommun kommun 
	drop 		if Alder==.
	
	* Stratification variables
	destring Sun2000niva, force replace
	gen 	Educ = .
	replace Educ = 1 if Sun2000niva<300 & Sun2000niva!=0 
	replace Educ = 2 if Sun2000niva>299 & Sun2000niva<400
	replace Educ = 3 if Sun2000niva>399 & Sun2000niva!=999 & Sun2000niva!=. 
		
	* Obtaining aggregated education specialization 
	gen educ_specialization = substr(Sun2000Inr,1,1)
	destring educ_specialization, force replace
	
	* Reducing Space and generating some variables
	compress
	destring Lan, force replace
	destring AstKom_Arb, force replace
	gen ALL=1
	gen FEMALE=(Female==1)
	gen MALE=(Female==0)
	save "$path_base\CleanedSample2", replace 
	}
	
	* Dropping irrelevant variables and combining the two chunks to final cleaned data
	drop Sun2000niva Sun2000Inr SyssStat AstSNI92 Ssyk4_2012 SEKtotwage2 NovMark_Arb Utb_Inr ALL FEMALE MALE
	save "$path_base\CleanedSample2", replace 
	use "$path_base\CleanedSample1", clear 
	drop Sun2000niva Sun2000Inr SyssStat AstSNI92 SEKtotwage2 NovMark_Arb Utb_Inr ALL FEMALE MALE
	append using "$path_base\CleanedSample2" 
	save "$path_base\CleanedSampleALL", replace 
}
 
***************************************************************************
* Incorporating additional variables ordered after R&R decision by REStat *
***************************************************************************
 {
	* Akassa, Family ID, and birthcounty
	forvalues i=1997(1)2014{
		odbc load, exec("select * from individ_`i'_20190826")dsn("P0950") clear
		gen year = `i'
		keep P0950_LopNr_PersonNr FodelseLan Akassa year P0950_LopNr_FamId
		save "$path_base\IndividExtra_`i'_`j'", replace
		}
		
		use "$path_base\IndividExtra_1997_", clear
		forvalues i=1998(1)2014 {
		append using "$path_base\IndividExtra_`i'_"
		}
		compress
		save "$path_base\IndividExtra", replace

	* Flink
		odbc load, exec("select * from koppling_barn_foraldrar")dsn("P0950") clear
		ren P0950_Lopnr_personnr P0950_LopNr_PersonNr
		duplicates drop P0950_LopNr_PersonNr, force 
		save "$path_base\FamKop", replace

	* Merging
	use "$path_base\IndividExtra", clear
	merge m:1 P0950_LopNr_PersonNr using "$path_base\FamKop"
	drop _merge
	duplicates drop P0950_LopNr_PersonNr year, force
	save "$path_base\IndividExtra", replace

	* Adding to main data
 	use "$path_base\CleanedSampleALL", clear 
 	merge 1:1 P0950_LopNr_PersonNr year using "$path_base\IndividExtra"	
	drop if _merge==2
	drop _merge
	save "$path_base\CleanedSampleALL", replace 
 }
 
******************************
* Constructing key variables *
******************************
{
	
	use "$path_base\CleanedSampleALL", clear 
	
	* i. Treatment indicator 
	gen Treat=(kommun==1280)
		 
	* ii. Defauls categories in event study regressions
	char year[omit] 1999
	char kommun[omit] 1280

	* iii. Fenerating some demographic covariates and eliminating variables that wont be used to save space 
	compress 
	
	gen married=(Civil=="G")
	replace married=1 if Civil=="G "
	gen child=(Barn0_3>0 & Barn0_3!=. | Barn4_6>0 & Barn4_6!=. | Barn7_10>0 & Barn7_10!=. | Barn11_15>0 & Barn11_15!=. | Barn16_17>0 & Barn16_17!=.)
	drop Civil Barn0_3 Barn4_6 Barn7_10 Barn11_15 Barn16_17
	drop Utd_KOmbi P0950_Lopnr_personnradfar P0950_Lopnr_personnradmor P0950_Lopnr_personnrfar P0950_Lopnr_personnrmor Kon AstSni_arb InstSekt_Arb SyssMark_Arb SenInvAr
	gen Lths=(Educ==1)
	gen Mths=(Educ==3)
	compress 
	
	* iv. Relative-time x Treatment indicators
	gen Y1997 = (year==1997 & Treat==1)
	gen Y1998 = (year==1998 & Treat==1)
	gen Y1999 = 0
	gen Y2000 = (year==2000 & Treat==1)
	gen Y2001 = (year==2001 & Treat==1)
	gen Y2002 = (year==2002 & Treat==1)
	gen Y2003 = (year==2003 & Treat==1)
	gen Y2004 = (year==2004 & Treat==1)
	gen Y2005 = (year==2005 & Treat==1)
	gen Y2006 = (year==2006 & Treat==1)
	gen Y2007 = (year==2007 & Treat==1)
	gen Y2008 = (year==2008 & Treat==1)
	compress
	gen Y2009 = (year==2009 & Treat==1)
	gen Y2010 = (year==2010 & Treat==1)
	gen Y2011 = (year==2011 & Treat==1)
	gen Y2012 = (year==2012 & Treat==1)
	gen Y2013 = (year==2013 & Treat==1)
	gen Y2014 = (year==2014 & Treat==1)
	
	
	* v. Saving samples
	compress
	save "$path_base\RegSamp2", replace										// Full country Extended Years
	
	preserve
	keep if kommun==1280 | Lan==7 | Lan==10 | Lan==13 | Lan==6 | Lan==8 	// Main Analysis Extended Years 
	save "$path_base\RegSamp1", replace
		 
	keep if year<2009
	save "$path_base\RegSamp", replace 										// Main Analysis Regular Years
	restore 
	
	keep if year<2009
	save "$path_base\RegSamp3", replace 									// Full Country Regular Years
}
 
* Additional cleaning chunk: Extending the sample with 1995 and 1996 (for which we do have information on individuals in Sweden, but not on their cross-border behavior) to explore extended pre-trend concerns
* This is appended to the main sample in the analysis file when relevant. It is not added to the main sample because of data limitations on the local server. 
{
* Importing Swedish background data
    forvalues j=1(1)2 {
	forvalues i=1995(1)1996{
		odbc load, exec("select * from individ_`i'_20190826")dsn("P0950") clear
		gen year = `i'
		destring Kon, force replace
		keep if Kon == `j'
		destring Kommun, force replace
		keep if Alder<65
		drop if Alder<18
		save "$path_base\Individ_`i'_`j'", replace
		}

* Note - doing this data importation by gender due to memory constraints on local server. They are merged below. 
	use "$path_base\Individ_1995_`j'", clear
	append using "$path_base\Individ_1996_`j'"
	compress

	gen obs = 1
	save "$path_base\Individ1995_1996_`j'", replace
	}
	
* Appending
	use "$path_base\Individ1995_1996_1", clear
	drop Barn18plus AntFlyttTot Sun2000Inr ExamAr Barn0_3 Barn4_6 Barn7_10 Barn11_15 Barn16_17 YrkStalln AstSNI92 KU1Ink KU1YrkStalln KU1AstKommun KU1AstLan KU1InstKod KU1SektorKod KU2Ink KU2YrkStalln KU2AstKommun KU2AstLan KU2InstKod KU2SektorKod KU3Ink KU3YrkStalln KU3AstKommun KU3AstLan KU3InstKod KU3SektorKod KU1AstSNI92 KU2AstSNI92 KU3AstSNI92
	save, replace
	use "$path_base\Individ1995_1996_2", clear
	drop Barn18plus AntFlyttTot Sun2000Inr ExamAr Barn0_3 Barn4_6 Barn7_10 Barn11_15 Barn16_17 YrkStalln AstSNI92 KU1Ink KU1YrkStalln KU1AstKommun KU1AstLan KU1InstKod KU1SektorKod KU2Ink KU2YrkStalln KU2AstKommun KU2AstLan KU2InstKod KU2SektorKod KU3Ink KU3YrkStalln KU3AstKommun KU3AstLan KU3InstKod KU3SektorKod KU1AstSNI92 KU2AstSNI92 KU3AstSNI92
	save, replace
	append using "$path_base\Individ1995_1996_1"

* Adjusting variables 
	ren Alder age
	gen female=(Kon==1)
	ren Kommun kommun

	drop P0950_LopNr_FamId Kon Forsamling AntFlyttKommun SenInvAr Civil CivilAntAr FamStF FamTypF Sun2000niva AntAns AstKommun AstLan AntalSys InstKod SektorKod FInk StudDeltTyp
	
	compress
	
	destring Lan, force replace
	keep if kommun==1280 | Lan==7 | Lan==10 | Lan==13 | Lan==6 | Lan==8
	
* Building treatment variables 
	gen Treat=(kommun==1280)
	gen Y1995 = (year==1995 & Treat==1)
	gen Y1996 = (year==1996 & Treat==1)	

	replace 	LoneInk			= LoneInk * 100		 			
	gen 		SEKtotwage		= LoneInk 
	replace 	SEKtotwage		= 0 if SEKtotwage	== .
	gen 		SEKwork			= (LoneInk > 0 & LoneInk !=. ) 	
	
	
	keep P0950_LopNr_PersonNr Y* SEKwork SEKtotwage Treat year kommun FodelseAr	
	
* Saving the data 
	save "$path_base\Individ1995_1996", replace
}
 
log close
